In [ ]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('fivethirtyeight')
plt.rcParams['figure.figsize'] = (16, 12)

In [ ]:
emotion_score = pd.read_csv('Tokyo_score_raw.txt', index_col=0, sep='\s+')
emotion_score.index = emotion_score.index.astype('datetime64[ns]')
emotion_score.tail()

In [ ]:
stock = pd.read_csv('N225_data.txt', sep='\s+', index_col=0)
stock.index = stock.index.astype('datetime64[ns]')
stock.tail()

数据读入完毕,开始按照一定的时间段进行汇总并计算相关性

按照周来进行相关(之前有考虑过照片过少而导致的拟合的问题,现在先不考虑这个情况) count = emotion_all['emotion_score'].resample('MS').count()


In [ ]:
gp1 = emotion_score['emotion_score'].resample('MS').mean()
std1 = gp1.std()
miu1 = gp1.mean()
gp11 = (gp1-miu1)/std1  ## 此处进行了汇总之后才进行了归一化,如果不做汇总,归一化结果图像是沿着坐标中紧密波动的
gp11.tail()

In [ ]:
gp2 = stock['Close'].resample('MS').mean()
std2 = gp2.std()
miu2 = gp2.mean()
gp22 = (gp2-miu2) /std2
gp22.tail()

In [ ]:
df = pd.concat([-gp11, gp22], axis=1, join='inner')
df.plot()
print("相关系数 为 {}".format(df.corr()['emotion_score']['Close']))